package com.openness.spider.commons;

import com.openness.crawler.url.TLDList;

/**
 * @author Rain
 * @version 2013-6-29 上午10:52:19
 */

public class UrlUtil {

	public static String getDomain(String url) {
		if (url == null || url.isEmpty()) {
			return null;
		}

		String domain = null;

		String subDomain = null;

		int domainStartIdx = url.indexOf("//") + 2;
		int domainEndIdx = url.indexOf('/', domainStartIdx);

		domain = url.substring(domainStartIdx, domainEndIdx);

		subDomain = "";

		String[] parts = domain.split("\\.");
		if (parts.length > 2) {
			domain = parts[parts.length - 2] + "." + parts[parts.length - 1];

			int limit = 2;

			if (TLDList.getInstance().contains(domain)) {
				domain = parts[parts.length - 3] + "." + domain;

				limit = 3;
			}

			for (int i = 0; i < parts.length - limit; i++) {
				if (subDomain.length() > 0) {
					subDomain += ".";
				}

				subDomain += parts[i];
			}
		}

		return domain;
	}

	public static String getSubDomain(String url) {
		if (url == null || url.isEmpty()) {
			return null;
		}

		String domain = null;

		String subDomain = null;

		int domainStartIdx = url.indexOf("//") + 2;
		int domainEndIdx = url.indexOf('/', domainStartIdx);

		domain = url.substring(domainStartIdx, domainEndIdx);

		subDomain = "";

		String[] parts = domain.split("\\.");
		if (parts.length > 2) {
			domain = parts[parts.length - 2] + "." + parts[parts.length - 1];

			int limit = 2;

			if (TLDList.getInstance().contains(domain)) {
				domain = parts[parts.length - 3] + "." + domain;

				limit = 3;
			}

			for (int i = 0; i < parts.length - limit; i++) {
				if (subDomain.length() > 0) {
					subDomain += ".";
				}

				subDomain += parts[i];
			}
		}

		return subDomain;
	}

	public static void main(String[] args) {
		String url = "http://news.qq.com/a/20130629/003329.htm";

		System.out.println("domain: " + UrlUtil.getDomain(url));

		System.out.println("subDomain: " + UrlUtil.getSubDomain(url));
	}

}
